Test Gene2ModuleExpressionScores2 on Kali Immunity (Tran et all Immunity 2019) Dataset
#local path: "/Volumes/GoogleDrive/My Drive/Manuscripts/RNASeq Manuscript/Final Data Sets/Infected DGE Analysis edgeR Paired n71 7072 genes 2018-07-30 eset.rds"
#from google drive
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
as_id("1ex6BT-0sWSnrl7LNT0xoBItv2R2lqRPT"), path = temp, overwrite = TRUE)
## ! Using an auto-discovered, cached token.
## To suppress this message, modify your code or options to clearly consent to
## the use of a cached token.
## See gargle's "Non-interactive auth" vignette for more details:
## <https://gargle.r-lib.org/articles/non-interactive-auth.html>
## ℹ The googledrive package is using a cached token for 'tuantran@iu.edu'.
## File downloaded:
## • 'Infected DGE Analysis edgeR Paired n71 7072 genes 2018-07-30 eset.rds'
## <id: 1ex6BT-0sWSnrl7LNT0xoBItv2R2lqRPT>
## Saved locally as:
## • '/var/folders/nc/wdrlkm350r57dvty_cc9cr68nljq2f/T//RtmpPVAEDO/file99072c41c613.rds'
paired_eset <- readRDS(file = dl$local_path)
# Make DGEList object
y <- DGEList(counts=counts(paired_eset), genes= fData(paired_eset), group= paste(paired_eset$Class, paired_eset$Infection.Status, sep = "_"),remove.zeros=T)
## Removing 4584 rows with all zero counts
# tapply(y$samples$lib.siz, INDEX= y$samples$group, summary)
# plot(y$samples$group, y$samples$lib.siz)
# summary(y$samples$group)
##########
# Filter #
##########
y <- y[rowSums(cpm(y)>1) >= min(summary(y$samples$group)),] #Filter low expression tags: keep genes with at least 1 CPM in at least X, where X is the number of samples in the smallest group
y <- y[rowSums(is.na(y $genes))==0, ] #Filter those with annotation (they all have annotation so this is optional)
dim(y)
## [1] 14677 142
o <- order(rowSums(y$counts)) #filter duplicates (chose transcript with highest overall count per user guide)
y <- y[o,]
d <- duplicated(y$genes$GeneSymbol)
y <- y[!d,]
y$samples$lib.size <- colSums(y$counts)
y <- calcNormFactors(y) #Normalization
cpm_dat <- cpm(y, prior.count = 2,log=TRUE)
# create ExpressionSet and view data for moderated cpm
fData_cpm <- featureData(paired_eset)[rownames(as.matrix(cpm_dat)),]
logcpm_eset <- new("ExpressionSet", phenoData = phenoData(paired_eset), exprs = as.matrix(cpm_dat), featureData = fData_cpm)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
plot_dat <- pData(logcpm_eset) %>%
left_join(., hiBTM_dat %>%
t() %>%
scale() %>%
as.data.frame() %>%
rownames_to_column(var = "xid"),
by = "xid")
library(pheatmap)
hiBTM_dat %>%
t() %>%
scale() %>%
t() %>%
pheatmap()
library(pheatmap)
lowBTM_dat %>%
t() %>%
scale() %>%
t() %>%
pheatmap()